Analysis date: 2023-08-11
CRC_Xenografts_Batch2_DataProcessing Script
load("../Data/Cache/Xenografts_Batch2_DataProcessing.RData")
set.seed(2023)
source("../../../General/Code/Analysis_Functions.R")
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ lubridate 1.9.2 ✔ tibble 3.2.1
## ✔ purrr 1.0.1 ✔ tidyr 1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::%within%() masks IRanges::%within%()
## ✖ dplyr::collapse() masks IRanges::collapse()
## ✖ dplyr::combine() masks Biobase::combine(), BiocGenerics::combine()
## ✖ dplyr::count() masks matrixStats::count()
## ✖ dplyr::desc() masks IRanges::desc()
## ✖ tidyr::expand() masks S4Vectors::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::first() masks S4Vectors::first()
## ✖ dplyr::lag() masks stats::lag()
## ✖ ggplot2::Position() masks BiocGenerics::Position(), base::Position()
## ✖ purrr::reduce() masks GenomicRanges::reduce(), IRanges::reduce()
## ✖ dplyr::rename() masks S4Vectors::rename()
## ✖ lubridate::second() masks S4Vectors::second()
## ✖ lubridate::second<-() masks S4Vectors::second<-()
## ✖ dplyr::select() masks AnnotationDbi::select()
## ✖ dplyr::slice() masks IRanges::slice()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
source("CRC_Xenografts_Batch2_Functions.R")
mat_kmean_pY <-
pY_Set3_form %>%
mutate(peptide = paste0(HGNC_Symbol, "_", Annotated_Sequence) ) %>%
column_to_rownames("peptide") %>%
select(all_of(contains("log2FC"))) %>%
as.matrix()
kmeans_tb_pY <-
pY_Set3_form %>%
select(HGNC_Symbol, Annotated_Sequence, all_of(contains("log2FC"))) %>%
pivot_longer(contains("log2FC"), names_to = "sample", values_to = "log2FC") %>%
separate(col = sample, sep = "_",
into = c("remove", "xenograft", "treatment",
"timepoint", "replicate", "set" ), remove = F ) %>%
mutate(peptide = paste0(HGNC_Symbol, "_", Annotated_Sequence) )
KMeans_Find_Nr_Clusters_elbow(mat_kmean_pY, c_max = 30)
pY_kmeans = kmeans(mat_kmean_pY, centers = 6, nstart = 200, iter.max = 10)
cluster_df_pY <- tibble("cluster" = pY_kmeans$cluster)
cluster_df_pY$peptide <- names(pY_kmeans$cluster)
kmeans_tb_pY <- left_join(kmeans_tb_pY, cluster_df_pY)
## Joining with `by = join_by(peptide)`
kmeans_tb_pY %>%
ggplot(aes(sample, log2FC, group = Annotated_Sequence)) +
geom_line(alpha = 0.2) +
geom_point(aes(color = treatment), size = 0.2) +
facet_wrap(~cluster) +
theme_bw() +
theme(axis.text.x = element_text(angle = 90)) +
scale_color_manual(values = PGPalette[c(1,2,4,5)])
DT::datatable(filter="top", kmeans_tb_pY %>%
select(HGNC_Symbol, Annotated_Sequence, cluster) %>%
unique)
message("Cluster 3")
## Cluster 3
Plot_StringDB(kmeans_tb_pY %>%
select(HGNC_Symbol, Annotated_Sequence, cluster) %>%
filter(cluster == 3) %>%
select(HGNC_Symbol) %>% unique())
mat_kmean_pST <-
pST_Set3_form %>%
mutate(peptide = paste0(HGNC_Symbol, "_", Annotated_Sequence) ) %>%
column_to_rownames("peptide") %>%
select(all_of(contains("log2FC"))) %>%
as.matrix()
kmeans_tb_pST <-
pST_Set3_form %>%
select(HGNC_Symbol, Annotated_Sequence, all_of(contains("log2FC"))) %>%
pivot_longer(contains("log2FC"), names_to = "sample", values_to = "log2FC") %>%
separate(col = sample, sep = "_",
into = c("remove", "xenograft", "treatment",
"timepoint", "replicate", "set" ), remove = F ) %>%
mutate(peptide = paste0(HGNC_Symbol, "_", Annotated_Sequence) )
KMeans_Find_Nr_Clusters_elbow(mat_kmean_pST, c_max = 30)
pST_kmeans = kmeans(mat_kmean_pST, centers = 3, nstart = 200, iter.max = 10)
cluster_df_pST <- tibble("cluster" = pST_kmeans$cluster)
cluster_df_pST$peptide <- names(pST_kmeans$cluster)
kmeans_tb_pST <-
left_join(kmeans_tb_pST, cluster_df_pST)
## Joining with `by = join_by(peptide)`
kmeans_tb_pST %>%
ggplot(aes(sample, log2FC, group = Annotated_Sequence)) +
geom_line(alpha = 0.2) +
geom_point(aes(color = treatment), size = 0.2) +
facet_wrap(~cluster, ncol = 3) +
theme_bw() +
theme(axis.text.x = element_text(angle = 90)) +
scale_color_manual(values = PGPalette[c(1,2,4,5)])
DT::datatable( filter="top", kmeans_tb_pST %>%
select(HGNC_Symbol, Annotated_Sequence, cluster) %>%
unique)
sessionInfo()
## R version 4.2.3 (2023-03-15)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur ... 10.16
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] lubridate_1.9.2 forcats_1.0.0
## [3] stringr_1.5.0 dplyr_1.1.2
## [5] purrr_1.0.1 readr_2.1.4
## [7] tidyr_1.3.0 tibble_3.2.1
## [9] tidyverse_2.0.0 cluster_2.1.4
## [11] factoextra_1.0.7 ggplot2_3.4.2
## [13] mdatools_0.14.0 SummarizedExperiment_1.28.0
## [15] GenomicRanges_1.50.2 GenomeInfoDb_1.34.9
## [17] MatrixGenerics_1.10.0 matrixStats_1.0.0
## [19] org.Hs.eg.db_3.16.0 AnnotationDbi_1.60.2
## [21] IRanges_2.32.0 S4Vectors_0.36.2
## [23] Biobase_2.58.0 BiocGenerics_0.44.0
## [25] fgsea_1.24.0
##
## loaded via a namespace (and not attached):
## [1] STRINGdb_2.10.1 bitops_1.0-7 bit64_4.0.5
## [4] RColorBrewer_1.1-3 httr_1.4.6 backports_1.4.1
## [7] tools_4.2.3 bslib_0.5.0 DT_0.28
## [10] utf8_1.2.3 R6_2.5.1 KernSmooth_2.23-22
## [13] DBI_1.1.3 colorspace_2.1-0 withr_2.5.0
## [16] tidyselect_1.2.0 bit_4.0.5 compiler_4.2.3
## [19] chron_2.3-61 cli_3.6.1 DelayedArray_0.24.0
## [22] labeling_0.4.2 sass_0.4.7 caTools_1.18.2
## [25] scales_1.2.1 digest_0.6.33 rmarkdown_2.23
## [28] XVector_0.38.0 pkgconfig_2.0.3 htmltools_0.5.5
## [31] plotrix_3.8-2 highr_0.10 fastmap_1.1.1
## [34] htmlwidgets_1.6.2 rlang_1.1.1 rstudioapi_0.15.0
## [37] RSQLite_2.3.1 farver_2.1.1 jquerylib_0.1.4
## [40] generics_0.1.3 jsonlite_1.8.7 crosstalk_1.2.0
## [43] BiocParallel_1.32.6 gtools_3.9.4 RCurl_1.98-1.12
## [46] magrittr_2.0.3 GenomeInfoDbData_1.2.9 Matrix_1.6-0
## [49] Rcpp_1.0.11 munsell_0.5.0 fansi_1.0.4
## [52] proto_1.0.0 lifecycle_1.0.3 sqldf_0.4-11
## [55] stringi_1.7.12 yaml_2.3.7 zlibbioc_1.44.0
## [58] gplots_3.1.3 plyr_1.8.8 grid_4.2.3
## [61] blob_1.2.4 parallel_4.2.3 ggrepel_0.9.3
## [64] crayon_1.5.2 lattice_0.21-8 Biostrings_2.66.0
## [67] cowplot_1.1.1 hash_2.2.6.2 hms_1.1.3
## [70] KEGGREST_1.38.0 knitr_1.43 pillar_1.9.0
## [73] igraph_1.5.0.1 codetools_0.2-19 fastmatch_1.1-3
## [76] glue_1.6.2 evaluate_0.21 data.table_1.14.8
## [79] tzdb_0.4.0 png_0.1-8 vctrs_0.6.3
## [82] gtable_0.3.3 gsubfn_0.7 cachem_1.0.8
## [85] xfun_0.39 broom_1.0.5 memoise_2.0.1
## [88] timechange_0.2.0 ellipsis_0.3.2
knitr::knit_exit()